/*
Script: feewaiver_analysis_afterRnR_replication

This file is part of the replication packet for "A Low-Cost Information Nudge Increases Citizenship Application Rates Among Low-Income Immigrants"

Purpose: This script analyzes the data from the 2017 fee waiver survey for the NaturalizeNY research project. 
The fee waiver 2017 group was in an experiment to measure the effect of informing people they were eligible for a fee waiver on their naturalization application rate. 

Input: 
NNYFeeWaiverDataForAnalysis

Output: 
descript.tex
balcheck.tex
reply.tex
effects.tex
timeeffects.tex
effectssubsample1.tex
effectssubsample2.tex
landscape.tex
effectsMI.tex
effectsFWuse.tex
feewaivereffects.txt



*/                                                                                                

*** setting up the code
set more off
clear all

* set the directory for the data location

global path = ""



*** Set location of data and location for output

global data_location   = ""
global output_location = ""


* reading in the data file
use "$data_location/NNYFeeWaiverReplicationData", clear

* setting the output location
cd "$output_location"



* limited and extensive covariate set 
global covarslimited   = "educ_HS educ_somecollege educ_BA gender_f plang_Eng plang_Span yrs_greencard_Q"
global covarsextensive = "age_Q hhsize_Q hhinc_Q marital_married marital_single yrequired_f lang_Eng lang_Span origin_DR origin_China origin_Ecuador"
global covars = "$covarslimited $covarsextensive"


/*Table S1: Descriptive Statistics for Fee Waiver Information Study
Mean and standard deivation for selected variables */


global covars1 = "submitted hhinc_Q hhsize_Q yrs_greencard_Q yrequired_f age_Q gender_f"
global covars2 = "origin_DR origin_China origin_Ecuador"
global covars3 = "marital_married marital_single"
global covars4 = "educ_HS educ_somecollege educ_BA"
global covars5 = "lang_Eng lang_Span plang_Eng plang_Span"
global covars7 = "anyfeewaivernote feewaivernotice1"


cap file close myfile
file open myfile using descript.tex, write replace
global WRITE file write myfile
$WRITE "\begin{table}[!hbt]" _n
$WRITE "  \centering" _n
$WRITE "  \small" _n
$WRITE "  \caption{Descriptive Statistics for Fee Waiver Information Study (N=1,537)}\label{descript}" _n
$WRITE "    \begin{tabular}{lcc}" _n
$WRITE "    \hline \hline" _n
$WRITE "          &    Mean & SD    \\" _n
$WRITE "\hline" _n

foreach dv of varlist $covars1  {
sum `dv'  
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  " `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "Origin: \\" _n
foreach dv of varlist $covars2  {
sum `dv'  
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  "$\,$ `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "Marital Status: \\" _n
foreach dv of varlist $covars3  {
sum `dv'  
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  "$\,$ `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "Highest Education: \\" _n
foreach dv of varlist $covars4  {
sum `dv'  
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  "$\,$ `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "Language: \\" _n
foreach dv of varlist $covars5  {
sum `dv'  
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  "$\,$ `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "Treatment Received: \\" _n
foreach dv of varlist $covars7 {
sum `dv' 
global  Obs1 = `r(N)'
global rmean1 = `r(mean)'
global rsd1 = `r(sd)'
local foo : variable label `dv'
$WRITE  "$\,$ `foo' & "  %9.2f ( $rmean1 ) " & " %9.2f ( $rsd1 )  " \\" _n
}

$WRITE "    \hline \hline" _n
$WRITE "\multicolumn{3}{p{0.6\textwidth}}{\scriptsize \emph{Note}: Sample consists of legal permanent residents who registered for the naturalization program and are potentially eligible for the federal fee waiver because their household income is below 150 percent of the Federal Poverty Guidelines or they receive means-tested benefits.}" _n
$WRITE "    \end{tabular} " _n
$WRITE " \end{table} " _n
file close myfile



/* Table S2: Balance Checks
Models 1 and 3 refer to the samples of all participants; models 2 and 4 to the samples of participants who responded to
the follow-up survey. */

* eststo is a commmand that is part of the estout package that outputs tables for tex from STATA
eststo clear

eststo: reg anyfeewaivernote $covars     , robust 
test $covars
estadd scalar p=r(p)

eststo: reg anyfeewaivernote $covars      , robust , if submitted!=.
test $covars
estadd scalar p=r(p)

eststo: reg feewaivernotice1 $covars     , robust  
test $covars
estadd scalar p=r(p)

eststo: reg feewaivernotice1 $covars      , robust , if submitted!=.
test $covars
estadd scalar p=r(p)

esttab * using balcheck.tex, varlabels(_cons Constant) label ar2 scalar("F F-value" "p P-value") ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Fee Waiver Note (Any)" "Fee Waiver Note (First Registration)", pattern(1 0 1 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) ///
 title(Balance Checks \label{balcheck}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{5}{p{5in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 F-values and P-values in the bottom rows are from omnibus F tests against the null that all slope coefficients are jointly equal to zero. ///
 Models 1 and 3 refer to the samples of all participants; models 2 and 4 to the samples of participants who responded to the follow-up survey.} ///
 \end{tabular} ///
 \end{table} ///
 )
 

/*Table S3: Survey Response Checks
Models 1 and 2 regress whether a participant responded to the survey on the indicator for whether participants ever received with information 
about the fee waiver program plus all interactions of this indiciator with the covariates. Models 3 and 4 are instrumental variable regressions 
where the indicator for whether participants ever received information about the fee waiver program is instrumented by an indicator for whether 
they received information about the fee waiver program
during their first registration.
*/

 
* compute interactions 
foreach x of global covars {
gen XX`x' = anyfeewaivernote*`x'
local foo : variable label `x'
label var XX`x' "FW Note \$\times\$ `foo'"
}

foreach x of global covars {
gen ZZ`x' = feewaivernotice1*`x'
local foo : variable label `x'
label var ZZ`x' "1st Note \$\times\$ `foo'"
}
 

eststo clear

* model 1
eststo: reg replied anyfeewaivernote  , robust 
estadd local covars "No"
testparm anyfeewaivernote
estadd scalar pp = r(p)
estadd scalar pf = r(F)

* model 2
eststo: reg replied anyfeewaivernote $covars XX* , robust 
estadd local covars "Yes"
testparm anyfeewaivernote XX*
estadd scalar pp = r(p)
estadd scalar pf = r(F)

* model 3
eststo: ivregress 2sls  replied (anyfeewaivernote=feewaivernotice1)      , robust 
estadd local covars "No"
testparm anyfeewaivernote
estadd scalar pp = r(p)
estadd scalar pchi = r(chi2)

* model 4
eststo: ivregress 2sls  replied (anyfeewaivernote XX* = feewaivernotice1 ZZ* ) $covars    , robust 
estadd local covars "Yes"
testparm anyfeewaivernote XX*
estadd scalar pp = r(p)
estadd scalar pchi = r(chi2)

esttab * using reply.tex, varlabels(_cons Constant) label prehead( ///
\begin{table}[htbp]\centering \tiny ///
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} ///
\caption{Survey Response Checks for Fee Waiver Study \label{reply}} ///
\begin{tabular}{l*{4}{c}} ///
\hline \hline ) ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Responded to Survey", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(covars N r2_a pf pchi pp, fmt(2 "" 0 "" 3) label("Covariates" "Observations" "Adusted R$^2$" "F-value" "Chi-Square" "P-value"))  ///
 title(Survey Response Checks  \label{replyFeeWaiver}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{5}{p{3.75in}}{\tiny ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Models 1 and 2 regress whether a participant responded to the survey on the indicator for whether participants ever received with information about the fee waiver program plus all interactions of this indiciator with the covariates. ///
 Models 3 and 4 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program is instrumented by an indicator for whether they received information ///
 about the fee waiver program during their first registration. For Model 4 the interactions  ///
between the covariates and the treatment are also instrumented by the interactions between receiving information about the fee waiver program during the first registration and the covariates. ///
 The Chi-Square-test for Model 3 tests the null that the coefficients on the treatement and the coefficients on the interactions of the covariates with the treatment are jointly equal to zero. The Chi-Square test for Model 4 is also a joint ///
 significance test against the null that the coefficients on the treatment and the interactions of the covariates with the treatment are jointly equal to zero.} ///
 \end{tabular} ///
 \end{table} ///
 )

 
/* Table S4: Effect Estimates
Models 1-3 regress the outcome on the indicator for whether participants ever received information about the fee waiver program. 
Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program is instrumented
by whether they received information about the fee waiver program the first time they completed the registration. The models provide regressions
on just the treatment variables, the limited covariate set, and the extensive covariate set. */

eststo clear

* model 1
eststo: reg submitted anyfeewaivernote , robust  
estadd local covars "No"
mat n1 = e(N)

* save stats for R figure
capture matrix drop res
* control mean
lincom _cons*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Control"
mat res = nullmat(res) \ res1

* treatment mean
lincom (_cons + anyfeewaivernote)*100 
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Treated"
mat res = nullmat(res) \ res1

* delta
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Effect 1"
mat res = nullmat(res) \ res1

* model 2
eststo: reg submitted anyfeewaivernote $covarslimited, robust 
estadd local covars "Limited Set"

* delta 2
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Effect 2"
mat res = nullmat(res) \ res1

* model 3
eststo: reg submitted anyfeewaivernote $covars, robust 
estadd local covars "Extensive Set"

* delta 3
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Effect 3"
mat res = nullmat(res) \ res1

* model 4
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1)      , robust 
estadd local covars "No"

* model 5
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covarslimited     , robust 
estadd local covars "Limited Set"

* model 6
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covars     , robust 
estadd local covars "Extensive Set"

matrix colnames res = "pe" "se" "N"
matlist res
mat2txt, matrix(res) saving("feewaivereffects.txt") replace

esttab * using effects.tex, varlabels(_cons Constant) label ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Applied for Naturalization", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(covars N,fmt(2 "" 0) label("Covariates" "Observations"))  ///
 title(Effect Estimates \label{effects}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{7}{p{5.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Models 1-3 regress the outcome on the indicator for whether participants ever received information about the fee waiver program.  ///
 Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program ///
 is instrumented by whether they received information about the fee waiver ///
 program the first time they completed the registration.} ///
 \end{tabular} ///
 \end{table} ///
 )
 
 
 
/* Table S5: Effect Estimates (Controlling for Time of Registration)
Models 1-3 regress the outcome on the indicator for whether participants ever received information about the fee waiver program. 
Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program is instrumented
by whether they received information about the fee waiver program the first time they completed the registration. The models provide regressions
on just the treatment variables, the limited covariate set plus a time-since-registration variable, the extensive covariate set plus a time-since-registration
variable. */


eststo clear

* model 1
eststo: reg submitted anyfeewaivernote , robust  
estadd local covars "No"
mat n1 = e(N)

* model 2
eststo: reg submitted anyfeewaivernote $covarslimited reg_survey_time, robust 
estadd local covars "Limited Set"


* model 3
eststo: reg submitted anyfeewaivernote $covars reg_survey_time, robust 
estadd local covars "Extensive Set"

* model 4
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1)      , robust 
estadd local covars "No"

* model 5
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covarslimited   reg_survey_time  , robust 
estadd local covars "Limited Set"

* model 6
eststo: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covars  reg_survey_time   , robust 
estadd local covars "Extensive Set"


esttab * using timeeffects.tex, varlabels(_cons Constant) label ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Applied for Naturalization", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(covars N,fmt(2 "" 0) label("Covariates" "Observations"))  ///
 title(Effect Estimates (Controlling for Time of Registration) \label{effects}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{7}{p{5.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Models 1-3 regress the outcome on the indicator for whether participants ever received information about the fee waiver program.  ///
 Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program ///
 is instrumented by whether they received information about the fee waiver ///
 program the first time they completed the registration. This table includes an additional covariate ///
 for the time between registration and survey response. This variable was not pre-specified so is included ///
 in a separate regression table. The addition of this variable does not change the results. } ///
 \end{tabular} ///
 \end{table} ///
 )
 
 
 
 

 
 /* Table S6: Subgroup Effects 1-7
Regression coefficients shown with robust standard errors in parentheses. Model 1 is the subsample of respondents that are males. 
Model 2 is the subsample of respondents that arefemales. Model 3 is the subsample of respondents that received less than a high school education. 
Model 4 is the subsample of respondents that did not receive any education greater than highschool. Model 5 is the subsample of respondents 
that received at least some college education. Model 6 is the subsample of respondents that registered for the program using an English-language
version of the registration. Model 7 is the subsample of respondents that registered for the program using a Spanish-language version of the 
registration.  */


eststo clear

* model 1

*gender subsample

foreach i of numlist 0 1 {
eststo: reg submitted anyfeewaivernote  , robust , if gender_f == `i'
if `i' == 0 {
estadd local modelname = "Male" 
}
if `i' == 1{
estadd local modelname = "Female"
}

mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Gender `i'" 
mat res = nullmat(res) \ res1
}

*education subsample

eststo: reg submitted anyfeewaivernote  , robust , if NoHSDiplom == 1
estadd local modelname = "\textless High School"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "\textless High School" 
mat res = nullmat(res) \ res1

eststo: reg submitted anyfeewaivernote  , robust , if HSorless == 1
estadd local modelname = "\textless College"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "\textless College" 
mat res = nullmat(res) \ res1

eststo: reg submitted anyfeewaivernote  , robust , if College == 1
estadd local modelname = "Some College or More"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Some College or More" 
mat res = nullmat(res) \ res1


* language subsample

eststo: reg submitted anyfeewaivernote , robust , if lang_Eng == 1
estadd local modelname = "English"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Survey Eng" 
mat res = nullmat(res) \ res1

eststo: reg submitted anyfeewaivernote , robust , if lang_Span == 1
estadd local modelname = "Spanish"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Survey Spanish" 
mat res = nullmat(res) \ res1

matlist res



 * table for subsample
esttab * using effectssubsample1.tex, varlabels(_cons Constant) nomtitles ///
label prehead( ///
\begin{table}[htbp]\centering \scriptsize ///
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} ///
\caption{Effect Estimates of the Information Treatment by Subsample (Models 1 - 7)\label{effectsSub1}} ///
\begin{tabular}{l*{8}{c}} ///
\hline \hline ) ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Applied for Naturalization", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(N modelname,fmt(0) label("Observations" "Subsample"))  ///
 title(Effect Estimates of the Information Treatment by Subsample (Models 1 - 7)) ///
 postfoot( ///
 \hline  ///
 \multicolumn{8}{p{6.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Model 1 is the subsample of respondents that are males. ///
 Model 2 is the subsample of respondents that are females. ///
 Model 3 is the subsample of respondents that received less than a high school education. ///
 Model 4 is the subsample of respondents that did not receive any education greater than high school. ///
 Model 5 is the subsample of respondents that received at least some college education. ///
 Model 6 is the subsample of respondents that registered for the program using an English-language version of the registration. ///
 Model 7 is the subsample of respondents that registered for the program using a Spanish-language version of the registration.  } ///
 \end{tabular} ///
 \end{table} ///
 )
 
 
 
 
 /* Table S7: Subgroup Effects 2
Regression coefficients shown with robust standard errors in parentheses.  Model 7 is the subsample of respondents that registered for the program using a Spanish-language version of the 
registration. Model 8 is the subsample of respondentsthat are at or below the median household income of $7,200. Model 9 is the subsample of 
respondents that are above the median household income of $7,200. Model 10 is the subsample ofrespondents that are at or below the median age 
of 33. Model 11 is the subsample of respondents that are above the median age of 33. Model 12 is the subsample of respondents that are from the
Dominican Republic. */


eststo clear


*hhinc subsample

sum hhinc_cap, detail
gen hhinc_capCat = hhinc_cap> r(p50)

foreach i of numlist 0 1 {
eststo: reg submitted anyfeewaivernote , robust , if hhinc_capCat == `i'

if `i' == 0 {
estadd local modelname = "Low income" 
}
if `i' == 1{
estadd local modelname = "High income"
}
mat n1 = `e(N)'

lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "HHInc `i'" 
mat res = nullmat(res) \ res1
}
reg submitted anyfeewaivernote##hhinc_capCat  , robust 


* age subsample
sum age, detail

gen ageCat = age> r(p50)

foreach i of numlist 0 1 {
eststo: reg submitted anyfeewaivernote , robust , if ageCat == `i'
if `i' == 0 {
estadd local modelname = "Younger" 
}
if `i' == 1{
estadd local modelname = "Older"
}
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Age `i'" 
mat res = nullmat(res) \ res1
}

* origin subsample

eststo: reg submitted anyfeewaivernote  , robust , if origin_DR == 1
estadd local modelname = "Dominican Republic"
mat n1 = `e(N)'
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Dominican Republic" 
mat res = nullmat(res) \ res1

matlist res
matrix colnames res = "pe" "se" "N"
mat2txt, matrix(res) saving("feewaivereffects.txt") replace

 
 * table for subsample
esttab * using effectssubsample2.tex, varlabels(_cons Constant) nomtitles ///
label prehead( ///
\begin{table}[htbp]\centering \scriptsize ///
\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi} ///
\caption{Effect Estimates of the Information Treatment by Subsample (Models 8 - 12) \label{effectsSub2}} ///
\begin{tabular}{l*{7}{c}} ///
\hline \hline ) ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Applied for Naturalization", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(N modelname,fmt(0) label("Observations" "Subsample"))  ///
 title(Effect Estimates of the Information Treatment by Subsample) ///
 postfoot( ///
 \hline  ///
 \multicolumn{6}{p{6.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Model 8 is the subsample of respondents that are at or below the median household income of \\$7,200. ///
 Model 9 is the subsample of respondents that are above the median household income of \\$7,200. ///
 Model 10 is the subsample of respondents that are at or below the median age of 33. ///
 Model 11 is the subsample of respondents that are above the median age of 33. ///
 Model 12 is the subsample of respondents that are from the Dominican Republic.} ///
 \end{tabular} ///
 \end{table} ///
 )
 




/* Table S8: Effect Estimates (Multiple Imputation)
Note: Regression coefficients shown with robust standard errors in parentheses. Models 1-3 regress the outcome on the indicator for whether
participants ever received information about the fee waiver program. Models 4-6 are instrumental variable regressions where the indicator for
whether participants ever received information about the fee waiver program is instrumented by whether they received information about the
fee waiver program the first time they completed the registration. All analyses are based on multiple imputation using 10 imputed datasets. */

* sorting by a unique_id to keep the MI estimates stable
sort unique_id

* multiple imputation
mi set flong 
mi xtset, clear
mi register imputed $covars submitted feewaivernotice1 anyfeewaivernote
mi impute chained (pmm, knn(5)) $covars submitted feewaivernotice1 anyfeewaivernote,  add(10) rseed(42)

* fit models
eststo clear

* model 1
mi estimate, cmdok post: reg submitted anyfeewaivernote , robust 
eststo
mat n1 = e(N)
estadd local covars "No"

* save stats for R figure
capture matrix drop res

* control mean
lincom _cons*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Control"
mat res = nullmat(res) \ res1

* treatment mean
lincom (_cons + anyfeewaivernote)*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Treated"
mat res = nullmat(res) \ res1

* delta
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se)  , n1
mat rownames res1 = "Effect 1"
mat res = nullmat(res) \ res1

* model 2
mi estimate, cmdok post: reg submitted anyfeewaivernote $covarslimited , robust 
eststo
estadd local covars "Limited Set"

* delta 2
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Effect 2"
mat res = nullmat(res) \ res1

* model 3
mi estimate, cmdok post: reg submitted anyfeewaivernote $covars, robust 
eststo
estadd local covars "Extensive Set"

* delta 3
lincom anyfeewaivernote*100
mat res1 = r(estimate) , r(se) , n1
mat rownames res1 = "Effect 3"
mat res = nullmat(res) \ res1

* model 4
mi estimate, cmdok post: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1)       , robust 
eststo
estadd local covars "No"

* model 5
mi estimate, cmdok post: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covarslimited   , robust 
eststo
estadd local covars "Limited Set"

* model 6
mi estimate, cmdok post: ivregress 2sls  submitted (anyfeewaivernote=feewaivernotice1) $covars   , robust 
eststo
estadd local covars "Extensive Set"

esttab * using effectsMI.tex, varlabels(_cons Constant) label ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Applied for Naturalization", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(covars N,fmt(2 "" 0) label("Covariates" "Observations"))  ///
 title(Effect Estimates (Multiple Imputation) \label{effectsMI}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{7}{p{4.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Models 1-3 regress the outcome on the indicator for whether participants ever received information about the fee waiver program.  ///
 Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program ///
 is instrumented by whether they received information about the fee waiver ///
 program the first time they completed the registration.  All analyses are based on multiple imputation using 10 imputed datasets. See text for details.     } ///
 \end{tabular} ///
 \end{table} ///
 )
 

/* Table S10: Effect of Fee Waiver on Usage
Note: Models 1-3 regress fee waiver usage on the indicator for whether participants ever received information about the fee waiver program.  
Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program 
is instrumented by whether they received information about the fee waiver program the first time they completed the registration. 
All analyses are based on multiple imputation using 10 imputed datasets.
*/

* reading in the data file (new imputations will be done, so need to clear it)
use "$data_location/NNYFeeWaiverReplicationData", clear

drop if submitted==.

preserve

* multiplate imputation
mi set flong 
mi xtset, clear
mi register imputed $covars submitted anyfeewaivernote usefw 
mi impute chained (pmm, knn(5)) $covars submitted anyfeewaivernote usefw ,  add(10) rseed(42)

* clearning stored values
eststo clear

* model 1
mi estimate, cmdok post: reg usefw anyfeewaivernote , robust
eststo 
estadd local covars "No"
mat n1 = e(N)

* model 2
mi estimate, cmdok post: reg usefw anyfeewaivernote $covarslimited, robust
eststo
estadd local covars "Limited Set"

* model 3
mi estimate, cmdok post: reg usefw anyfeewaivernote $covars, robust 
eststo
estadd local covars "Extensive Set"

* model 4
mi estimate, cmdok post: ivregress 2sls  usefw (anyfeewaivernote=feewaivernotice1)      , robust 
eststo
estadd local covars "No"

* model 5
mi estimate, cmdok post: ivregress 2sls  usefw (anyfeewaivernote=feewaivernotice1) $covarslimited     , robust 
eststo
estadd local covars "Limited Set"

* model 6
mi estimate, cmdok post: ivregress 2sls  usefw (anyfeewaivernote=feewaivernotice1) $covars     , robust 
eststo
estadd local covars "Extensive Set"



esttab * using effectsFWuse.tex, varlabels(_cons Constant) label ///
 cells(b(fmt(%9.3f)) se(par)) noomitted  nomtitle collabels(none) nobaselevels  varwidth(50) replace ///
 mgroups("Used Fee Waiver for Naturalization Application", pattern(1 0 0 0) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span})) stats(covars N,fmt(2 "" 0) label("Covariates" "Observations"))  ///
 title(Effect of Fee Waiver Notice on Fee Waiver Usage \label{effectsFWuse}) ///
 postfoot( ///
 \hline  ///
 \multicolumn{7}{p{5.2in}}{\scriptsize ///
 \emph{Note}: Regression coefficients shown with robust standard errors in parentheses. ///
 Models 1-3 regress fee waiver usage on the indicator for whether participants ever received information about the fee waiver program.  ///
 Models 4-6 are instrumental variable regressions where the indicator for whether participants ever received information about the fee waiver program ///
 is instrumented by whether they received information about the fee waiver ///
 program the first time they completed the registration. All analyses are based on multiple imputation using 10 imputed datasets.} ///
 \end{tabular} ///
 \end{table} ///
 )

 
 
 
 
* substitution effect - Estimating the Substitution Effects of the Fee Waiver Notice
* These calculations derive the estimates used in the equation B_Overall - Pi_CN / Pi_AN = B_AN

* ATE - the average effect of the fee waiver notice on the fee waiver usage (B_Overall)
mi estimate, cmdok post: reg usefw anyfee        
scalar ATE = _b[anyfee]

* share of always takers (Pi_AN)
mi estimate, cmdok post: reg    submitted        if anyfee==0 
scalar AT = _b[_cons]

* share of conditional naturalizers (Pi_CN)
mi estimate, cmdok post: reg submitted anyfee    
scalar CO = _b[anyfee]


* Fee waiver effect on fee waiver usage for always naturalizers (B_AN)
scalar ATE_AT = (ATE - CO)/AT

* Ratio = change in always naturalizers compared to change in conditional naturalizers
scalar Ratio =  (AT*1000*ATE_AT) / (1000*CO)

* listing the results
scalar list ATE CO AT ATE_AT Ratio

restore

* bootstrapping the confidence interval for the B_AN
* this bootstrap takes a considerable amount of time to run

* need a large matrix size to store values
* 800 is the maximum matrix size for some versions of STATA
set matsize 800

exit

cap matrix drop res1

forvalues i = 1/800 {

preserve

bsample _N

* multiple imputation
qui: mi set flong 
qui: mi xtset, clear
qui: mi register imputed $covars submitted anyfeewaivernote usefw 
qui: mi impute chained (pmm, knn(5)) $covars submitted anyfeewaivernote usefw ,  add(10) rseed(42)

* ATE (B_Overall)
qui: mi estimate, cmdok post: reg usefw anyfee        
scalar ATE = _b[anyfee]

* share of ATs (Pi_AN)
qui: mi estimate, cmdok post: reg    submitted        if anyfee==0 
scalar AT = _b[_cons]

* share of conditional naturalizers (Pi_CN)
qui: mi estimate, cmdok post: reg submitted anyfee    
scalar CO = _b[anyfee]

*scalar list ATE AT CO

* Fee waiver effect on fee waiver usage for always naturalizers (B_AN)
scalar ATE_AT = (ATE - CO)/AT 

scalar Ratio =  (AT*1000*ATE_AT) / (1000*CO)

matrix temp = ATE , CO , AT , ATE_AT, Ratio
matrix res1 = nullmat(res1) \ temp

restore
}


matlist res1
* svmat takes a matrix and stores its columns as new variables
qui: svmat res1
sum res*

* renaming the output so it can be understood

rename res11 xATE
rename res12 xCO
rename res13 xAT
rename res14 xATE_AT
rename res15 xRatio

summarize x*

* tabstat creates a compact table of summary statistics
tabstat x* , s(n mean p50 p5 p95)

_pctile xATE_AT, p(2.5)
return list
_pctile xATE_AT, p(97.5)
return list 

_pctile xRatio, p(2.5)
return list
_pctile xRatio, p(97.5)
return list 


